home *** CD-ROM | disk | FTP | other *** search
- ///-*-C++-*-//////////////////////////////////////////////////////////////////
- //
- // Hoard: A Fast, Scalable, and Memory-Efficient Allocator
- // for Shared-Memory Multiprocessors
- // Contact author: Emery Berger, http://www.cs.utexas.edu/users/emery
- //
- // Copyright (c) 1998-2000, The University of Texas at Austin.
- //
- // This library is free software; you can redistribute it and/or modify
- // it under the terms of the GNU Library General Public License as
- // published by the Free Software Foundation, http://www.fsf.org.
- //
- // This library is distributed in the hope that it will be useful, but
- // WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- // Library General Public License for more details.
- //
- //////////////////////////////////////////////////////////////////////////////
-
-
- //////////////////////////////////////////////////////////////////////////////
- //
- // Note: This file was modified by Crystal Decisions in June 2002.
- //
- //////////////////////////////////////////////////////////////////////////////
-
-
- #include <assert.h>
-
- #include "arch-specific.h"
-
- #if defined(CRYSTAL_HOARD) && defined(__SVR4)
- // The Solaris version of the Crystal Hoard will call malloc instead of mmap.
- // This is a bit of an experiment. mmap gets very slow when managing many
- // small allocations, which we need to do if we want to release blocks.
- #include <dlfcn.h>
- #endif
-
- // How many iterations we spin waiting for a lock.
- enum { SPIN_LIMIT = 100 };
-
- // Crystal will define this in arch-specific.h
- #if !defined(CRYSTAL_HOARD)
- // The values of a user-level lock.
- enum { UNLOCKED = 0, LOCKED = 1 };
- #endif
-
- extern "C" {
-
- #if defined(__BEOS__)
-
- #include <OS.h>
- #include <unistd.h>
-
- void hoardCreateThread (hoardThreadType &t,
- void *( *function)( void *),
- void *arg)
- {
- t = spawn_thread((int32 (*)(void*))function, "some thread",
- B_NORMAL_PRIORITY, arg);
- if (t >= B_OK) resume_thread(t);
- else debugger("spawn_thread() failed!");
- }
-
-
- void hoardJoinThread (hoardThreadType &t)
- {
- status_t dummy;
- wait_for_thread(t, &dummy);
- }
-
-
- void hoardSetConcurrency (int)
- {
- }
-
-
- int hoardGetThreadID (void)
- {
- return find_thread(0);
- }
-
-
- void hoardLockInit (hoardLockType &lock)
- {
- lock.ben = 0;
- lock.sem = create_sem(0, "a hoard lock");
- }
-
-
- void hoardLock (hoardLockType &lock)
- {
- if((atomic_add(&(lock.ben), 1)) >= 1) acquire_sem(lock.sem);
- }
-
-
- void hoardUnlock (hoardLockType &lock)
- {
- if((atomic_add(&(lock.ben), -1)) > 1) release_sem(lock.sem);
- }
-
- int hoardGetPageSize (void)
- {
- return B_PAGE_SIZE;
- }
-
- int hoardGetNumProcessors (void)
- {
- system_info si;
- status_t result = get_system_info(&si);
- assert (result == B_OK);
- return si.cpu_count;
- }
-
-
- void * hoardSbrk (long size)
- {
- return sbrk(size + hoardHeap::ALIGNMENT - 1);
- }
-
-
- void hoardUnsbrk (void * ptr, long size)
- {
- // NOT CURRENTLY IMPLEMENTED!
- }
-
- void hoardYield (void)
- {
- }
-
- unsigned long hoardInterlockedExchange (unsigned long * oldval,
- unsigned long newval)
- {
- // This *should* be made atomic. It's never used in the BeOS
- // version, so this is included strictly for completeness.
- unsigned long o = *oldval;
- *oldval = newval;
- return o;
- }
-
-
- #elif !defined(WIN32) // UNIX
-
-
- #if USE_SPROC
- #include <sys/types.h>
- #include <sys/wait.h>
- #include <unistd.h>
- #include <ulocks.h>
- #endif
-
- void hoardCreateThread (hoardThreadType& t,
- void *(*function) (void *),
- void * arg)
- {
- #if USE_SPROC
- typedef void (*sprocFunction) (void *);
- t = sproc ((sprocFunction) function, PR_SADDR, arg);
- #else
- pthread_attr_t attr;
- pthread_attr_init (&attr);
- #if defined(_AIX)
- // Bound (kernel-level) threads.
- pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
- #endif
- pthread_create (&t, &attr, function, arg);
- #endif
- }
-
- void hoardJoinThread (hoardThreadType& t)
- {
- #if USE_SPROC
- waitpid (t, 0, 0);
- #else
- pthread_join (t, NULL);
- #endif
- }
-
- #if defined(__linux)
- // This extern declaration is required for Linux.
- extern "C" void pthread_setconcurrency (int n);
- #endif
-
- void hoardSetConcurrency (int n)
- {
- #if USE_SPROC
- usconfig (CONF_INITUSERS, n);
- #elif defined(__SVR4) // Solaris
- thr_setconcurrency (n);
- #else
- pthread_setconcurrency (n);
- #endif
- }
-
-
- #if defined(__SVR4) // Solaris
-
- // Solaris's two-level threads model gives us an edge here;
- // we can hash on the LWP's id. This helps us in two ways:
- // (1) there are likely to be far fewer LWP's than threads,
- // (2) if there's a one-to-one correspondence between LWP's
- // and the number of processors (usually the case), then
- // the number of heaps used will be the same as the number
- // of processors (the optimal case).
- // Here we rely on an undocumented call in libthread.so, which
- // turns out to be MUCH cheaper than the documented _lwp_self(). Go figure.
-
- #if defined(CRYSTAL_HOARD)
- // Crystal will use the undocumented _lwp_self(), only if it exists
- // in the process' symbol space. Otherwise, it will use the documented
- // version. The allows us to LD_PRELOAD the Hoard into single-threaded apps.
- // Not much to be gained by this, except that now you don't have to worry
- // about your LD_PRELOAD so much.
- #include <sys/lwp.h>
- typedef unsigned int (*lwp_self_func)(void);
- #endif
- extern "C" unsigned int lwp_self(void);
- #endif
-
- int hoardGetThreadID (void) {
- #if USE_SPROC
- // This hairiness has the same effect as calling getpid(),
- // but it's MUCH faster since it avoids making a system call
- // and just accesses the sproc-local data directly.
- int pid = (int) PRDA->sys_prda.prda_sys.t_pid;
- return pid;
- #else
- #if defined(__linux)
- // Consecutive thread id's in Linux are 1024 apart;
- // dividing off the 1024 gives us an appropriate thread id.
- return (int) pthread_self() >> 10; // (>> 10 = / 1024)
- #endif
- #if defined(__SVR4)
-
- #if defined(CRYSTAL_HOARD)
- // Crystal will use the undocumented lwp_self() function only if
- // it exists in the process' symbol space.
- static lwp_self_func undocumented_lwp_self = (lwp_self_func)dlsym(RTLD_DEFAULT, "lwp_self");
- if ( undocumented_lwp_self )
- return (int)undocumented_lwp_self();
- else
- return (int) _lwp_self();
- #else
- return (int) lwp_self();
- #endif
- #endif
- return (int) pthread_self();
- #endif
- }
-
-
- // If we are using either Intel or SPARC,
- // we use our own lock implementation
- // (spin then yield). This is much cheaper than
- // the ordinary mutex, at least on Linux and Solaris.
-
- #if USER_LOCKS && (defined(i386) || defined(sparc) || defined(__sgi) || defined(ppc))
-
- #include <sched.h>
-
- #if defined(__sgi)
- #include <mutex.h>
- #endif
-
-
- // Atomically:
- // retval = *oldval;
- // *oldval = newval;
- // return retval;
-
- #if defined(sparc) && !defined(__GNUC__)
- extern "C" unsigned long InterlockedExchange (unsigned long * oldval,
- unsigned long newval);
- #else
- static inline unsigned long InterlockedExchange (unsigned long * oldval,
- unsigned long newval)
- {
- #if defined(sparc)
- asm volatile ("swap [%1],%0"
- :"=r" (newval)
- :"r" (oldval), "0" (newval)
- : "memory");
-
- #endif
- #if defined(i386)
- asm volatile ("xchgl %0, %1"
- : "=r" (newval)
- : "m" (*oldval), "0" (newval)
- : "memory");
- #endif
- #if defined(__sgi)
- newval = test_and_set (oldval, newval);
- #endif
- #if defined(ppc)
- int ret;
- asm volatile ("sync;"
- "0: lwarx %0,0,%1 ;"
- " xor. %0,%3,%0;"
- " bne 1f;"
- " stwcx. %2,0,%1;"
- " bne- 0b;"
- "1: sync"
- : "=&r"(ret)
- : "r"(oldval), "r"(newval), "r"(*oldval)
- : "cr0", "memory");
- #endif
- return newval;
- }
- #endif
-
- unsigned long hoardInterlockedExchange (unsigned long * oldval,
- unsigned long newval)
- {
- return InterlockedExchange (oldval, newval);
- }
-
- void hoardLockInit (hoardLockType& mutex) {
- InterlockedExchange (&mutex, UNLOCKED);
- }
-
- #include <stdio.h>
-
- void hoardLock (hoardLockType& mutex) {
- // A yielding lock (with an initial spin).
- int i;
- while (1) {
- i = 0;
- while (i < SPIN_LIMIT) {
- if (InterlockedExchange (&mutex, LOCKED) == UNLOCKED) {
- // We got the lock.
- return;
- }
- i++;
- }
- // The lock is still being held by someone else.
- // Give up our quantum.
- hoardYield();
- }
- }
-
- void hoardUnlock (hoardLockType& mutex) {
- InterlockedExchange (&mutex, UNLOCKED);
- }
-
- #else
-
- // use non-user-level locks.
-
- #endif // USER_LOCKS
-
- #if defined(__SVR4)
- #include <thread.h>
- #endif
-
- void hoardYield (void)
- {
- #if defined(__SVR4)
- thr_yield();
- #else
- sched_yield();
- #endif
- }
-
- #if 1 // !(defined(__sgi))
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <fcntl.h>
- #include <unistd.h>
- #include <sys/mman.h>
- #endif
-
-
- #if defined(CRYSTAL_HOARD) && defined(__SVR4)
- void * hoardSbrk (long size)
- {
- // Try to grab the next nearest "malloc". This trick will allow other memory allocators to be
- // super-imposed with us. If we're not sitting on any other memory allocators, this will pick
- // up the libc.so.1 malloc. If THAT fails, we'll use mmap.
- typedef void * (*mallocFunc)(size_t size);
- static mallocFunc libc_malloc = (mallocFunc)dlsym(RTLD_NEXT, "malloc");
-
- if ( libc_malloc )
- return libc_malloc(size);
- else
- {
- // Use mmap to get memory from the backing store.
- static int fd = ::open ("/dev/zero", O_RDWR);
- char * ptr = (char *) mmap (NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
- if (ptr == (char *) MAP_FAILED)
- return NULL;
- else
- return ptr;
- }
- }
-
- void hoardUnsbrk (void * ptr, long size)
- {
- // Try to grab the next nearest "free". This trick will allow other memory allocators to be
- // super-imposed with us. If we're not sitting on any other memory allocators, this will pick
- // up the libc.so.1 free. If THAT fails, we'll use munmap.
- typedef void (*freeFunc)(void * ptr);
- static freeFunc libc_free = (freeFunc)dlsym(RTLD_NEXT, "free");
-
- if ( libc_free )
- libc_free(ptr);
- else
- {
- int result = munmap ((char *) ptr, size);
- assert(result == 0);
- }
- }
-
- #else // CRYSTAL_HOARD && __SVR4
-
- void * hoardSbrk (long size) {
- #if defined(linux)
- #if USER_LOCKS
- static hoardLockType sbrkLock = 0;
- #else
- static hoardLockType sbrkLock = PTHREAD_MUTEX_INITIALIZER;
- #endif
- #endif
- // Use mmap to get memory from the backing store.
- static int fd = ::open ("/dev/zero", O_RDWR);
- #if defined(linux)
- hoardLock (sbrkLock);
- #endif
- char * ptr = (char *) mmap (NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
- char * retPtr;
- if (ptr == (char *) MAP_FAILED) {
- retPtr = NULL;
- } else {
- retPtr = ptr;
- }
- #if defined(linux)
- hoardUnlock (sbrkLock);
- #endif
- return retPtr;
- }
-
- void hoardUnsbrk (void * ptr, long size)
- {
- #if defined(linux)
- #if USER_LOCKS
- static hoardLockType sbrkLock = 0;
- #else
- static hoardLockType sbrkLock = PTHREAD_MUTEX_INITIALIZER;
- #endif
- #endif
- #if defined(linux)
- hoardLock (sbrkLock);
- #endif
- int result = munmap ((char *) ptr, size);
- assert (result == 0);
- #if defined(linux)
- hoardUnlock (sbrkLock);
- #endif
- }
-
- #endif // CRYSTAL_HOARD && __SVR4
-
- int hoardGetPageSize (void)
- {
- return (int) sysconf(_SC_PAGESIZE);
- }
-
-
- #if defined(linux)
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <fcntl.h>
- #include <string.h>
- #endif
-
- #if defined(__sgi)
- #include <sys/types.h>
- #include <sys/sysmp.h>
- #include <sys/sysinfo.h>
- #endif
-
- int hoardGetNumProcessors (void)
- {
- #if !(defined(linux))
- #if defined(__sgi)
- return (int) sysmp(MP_NAPROCS);
- #else
- return (int) sysconf(_SC_NPROCESSORS_ONLN);
- #endif
- #else
- // Ugly workaround. Linux's sysconf indirectly calls malloc() (at
- // least on multiprocessors). So we just read the info from the
- // proc file ourselves and count the occurrences of the word
- // "processor".
-
- // We only parse the first 32K of the CPU file. By my estimates,
- // that should be more than enough for at least 64 processors.
- enum { MAX_PROCFILE_SIZE = 32768 };
- char line[MAX_PROCFILE_SIZE];
- int numProcessors = 0;
- int fd = open ("/proc/cpuinfo", O_RDONLY);
- assert (fd);
- read(fd, line, MAX_PROCFILE_SIZE);
- char * str = line;
- while (str) {
- str = strstr(str, "processor");
- if (str) {
- numProcessors++;
- str++;
- }
- }
- close (fd);
- assert (numProcessors > 0);
- return numProcessors;
- #endif
- }
-
- #endif // UNIX
-
- }
-